-------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  /Users/samharper/OneDrive - McGill University/usa-decomp-2015-race.log
  log type:  text
 opened on:  16 Feb 2017, 11:27:07

. 
. //  program: usa-decomp-2015-race.do
. //  task:    decompose life expectancy by sex and race over time        
. //  input:   allcause and cause-specific mortality
. //  output:  none
. //  project: life expectancy    
. //  author:  sam harper \ 16feb2017
. 
. 
. // #0
. // program setup
. 
. version 14

. set linesize 80

. clear all

. macro drop _all

. 
. 
. 
. // #1
. // load the mortality data, downloaded from CDC WONDER database
. 
. import delimited "usa-decomp-2015-race-data.txt", ///
>   encoding(ISO-8859-1)clear
(14 vars, 12,041 obs)

. 
. * drop extra rows for Notes from CDC WONDER
. drop if year==.
(73 observations deleted)

. 
. * fix up variable names and labels
. encode gender, gen(sex)

. encode race, gen(racebw)

. drop race

. rename racebw race

. label define race 1 "NH Black" 2 "NH White", modify

. label values race race

. 
. replace tenyearagegroups="01-04 years" if tenyearagegroups=="1-4 years"
(1,088 real changes made)

. replace tenyearagegroups="05-14 years" if tenyearagegroups=="5-14 years"
(1,088 real changes made)

. replace tenyearagegroups="00-01 years" if tenyearagegroups=="< 1 year"
(1,088 real changes made)

. encode tenyearagegroups, gen(age)

. rename (deaths population icd10113causelist) (count pop icdcode)

. 
. * extract numeric causes of death from CDCs 113 cause list
. gen cod113s=substr(icd10113causelistcode,7,3)

. destring cod113s, gen(cod113)
cod113s: all characters numeric; cod113 generated as int

. 
. * recode to a small number of causes (arbitrary)
. recode cod113 (16 = 6 "HIV") (19 = 2 "Cancers") (46 = 3 "Diabetes") ///
>   (52 = 4 "Alzheimer's") (53 = 1 "CVDs") (76 = 5 "Flu/pneumonia") ///
>   (82 = 7 "Chronic Resp dx") (93 = 8 "Liver dx") (97 = 9 "Kidney dx") ///
>   (114 = 10 "MV crashes") (122 = 11 "Poisoning") ///
>   (124 = 12 "Suicide") (127 = 13 "Homicide") ///
>   (1/15 17 18 44 45 47 50 51 79 87/92 96 102/105 108/111 115 116 118/121 ///
>   123 130 131 134/136 = 14 "Residual") ///
>   (20/43 48 49 54/75 77 78 80 81 83/86 94 95 98/101 106 107 112 113 117 ///
>   125 126 128 129 132 133 = .), gen(cod14)
(11880 differences between cod113 and cod14)

. 
. * summarize over sex, age, cause and year  
. collapse (sum) count (max) pop, by(sex race age cod14 year)

. 
. * drop extra cells for causes subsumed within broad categories
. * to avoid double counting
. drop if cod14==.
(88 observations deleted)

. drop if year==.
(0 observations deleted)

. 
. 
. gen rate = count / pop * 100000

. label var rate "death rate"

. label var count "no. of deaths"

. label var pop "mid-year population"

. 
. * save this dataset for life expectancy calculations (#3)
. save usa-decomp-2015-race, replace
file usa-decomp-2015-race.dta saved

. 
. 
. 
. // #2
. // calculate some age-adjusted death rates
. use "age19std.dta", clear

. label list age19
age19:
           1 00 years
           2 01-04 years
           3 05-09 years
           4 10-14 years
           5 15-19 years
           6 20-24 years
           7 25-29 years
           8 30-34 years
           9 35-39 years
          10 40-44 years
          11 45-49 years
          12 50-54 years
          13 55-59 years
          14 60-64 years
          15 65-69 years
          16 70-74 years
          17 75-79 years
          18 80-84 years
          19 85+ years

. recode age19 (3/4=3) (5/6=4) (7/8=5) (9/10=6) (11/12=7) (13/14=8) ///
>   (15/16=9) (17/18=10) (19=11), gen(age)
(192 differences between age19 and age)

. keep if std==9 // 2000 US Std Million
(209 observations deleted)

. collapse (sum) stdcount, by(age)

. egen tstdcount=sum(stdcount)

. gen stdwt=stdcount/tstdcount

. drop tstdcount

. label var stdwt "US 2000 standard weight"

. 
. merge 1:m age using usa-decomp-2015-race

    Result                           # of obs.
    -----------------------------------------
    not matched                             0
    matched                             1,232  (_merge==3)
    -----------------------------------------

. drop _merge

. 
. *create age-adjusted rates, all ages
. * multiply crude age-specific rate by standard weight
. gen aarate=rate*stdwt

. 
. * sum over age categories, by race, sex, cause, and year
. collapse (sum) count aarate pop, by(sex race cod year)

. 
. * crude rate
. gen crate=count/pop*100000

. label var crate "Crude rate per 100,000 population"

. label var aarate "Age-adjusted rate per 100,000 population"

. 
. * Appendix Table 1
. table cod year race, c(mean crate) by(sex) format(%6.1f)

----------------------------------------------
Gender and      |        Race and Year        
RECODE of       | - NH Black -    - NH White -
cod113          |  2014   2015     2014   2015
----------------+-----------------------------
Female          |
           CVDs | 223.4  228.7    309.0  319.0
        Cancers | 155.8  154.9    215.1  215.1
       Diabetes |  32.0   31.6     22.7   23.3
    Alzheimer's |  21.7   27.2     54.6   63.4
  Flu/pneumonia |  13.0   13.0     22.3   23.8
            HIV |   5.6    5.3      0.3    0.3
Chronic Resp dx |  22.4   23.4     67.7   71.9
       Liver dx |   5.3    5.7      9.8   10.6
      Kidney dx |  20.9   21.7     16.5   16.8
     MV crashes |   5.9    6.4      7.0    7.3
      Poisoning |   6.6    7.1     11.8   12.8
        Suicide |   2.1    2.1      7.9    8.3
       Homicide |   4.9    5.2      1.6    1.7
       Residual | 171.8  171.9    266.9  266.3
----------------+-----------------------------
Male            |
           CVDs | 246.7  253.1    322.3  330.5
        Cancers | 174.7  174.0    249.3  249.7
       Diabetes |  32.2   34.0     29.0   30.1
    Alzheimer's |   9.2   10.8     24.6   28.9
  Flu/pneumonia |  13.4   13.6     20.8   21.2
            HIV |  11.7   11.2      1.8    1.7
Chronic Resp dx |  25.0   26.2     61.0   63.4
       Liver dx |   9.5    9.7     18.0   18.7
      Kidney dx |  20.2   21.8     18.0   18.5
     MV crashes |  17.7   19.7     16.9   17.9
      Poisoning |  13.3   16.3     21.3   24.2
        Suicide |   9.5    9.8     27.6   28.3
       Homicide |  33.9   39.5      3.3    3.5
       Residual | 165.5  169.1    232.3  237.1
----------------------------------------------

. table cod year race, c(mean aarate) by(sex) format(%6.1f)

----------------------------------------------
Gender and      |        Race and Year        
RECODE of       | - NH Black -    - NH White -
cod113          |  2014   2015     2014   2015
----------------+-----------------------------
Female          |
           CVDs | 239.2  239.9    181.0  185.1
        Cancers | 160.8  156.6    142.7  140.6
       Diabetes |  34.0   32.8     14.6   14.9
    Alzheimer's |  24.5   30.0     29.9   34.4
  Flu/pneumonia |  13.9   13.7     13.4   13.8
            HIV |   5.6    5.3      0.3    0.3
Chronic Resp dx |  23.8   24.3     42.5   44.4
       Liver dx |   5.1    5.5      7.4    7.9
      Kidney dx |  22.5   22.9     10.0   10.1
     MV crashes |   5.8    6.3      6.5    6.8
      Poisoning |   6.6    7.1     11.8   13.0
        Suicide |   2.1    2.1      7.5    7.8
       Homicide |   4.9    5.1      1.6    1.7
       Residual | 182.5  179.4    165.5  164.2
----------------+-----------------------------
Male            |
           CVDs | 349.8  350.3    266.4  268.4
        Cancers | 237.5  231.0    197.7  194.3
       Diabetes |  43.9   45.1     23.4   23.8
    Alzheimer's |  18.9   21.3     21.5   24.9
  Flu/pneumonia |  20.2   20.3     17.6   17.6
            HIV |  12.3   11.6      1.5    1.4
Chronic Resp dx |  37.6   38.5     49.7   50.5
       Liver dx |  10.1   10.2     14.1   14.6
      Kidney dx |  29.7   31.4     15.2   15.2
     MV crashes |  18.2   19.9     16.2   17.0
      Poisoning |  13.7   16.7     21.5   24.7
        Suicide |   9.7   10.0     25.8   26.6
       Homicide |  32.3   37.6      3.3    3.6
       Residual | 226.2  225.8    199.0  199.7
----------------------------------------------

. 
. 
. // #3
. // set up for life table calculation
. 
. * sum deaths and population over causes (i.e., ignoring cause of death)
. use usa-decomp-2015-race, clear

. collapse (sum) count (max) pop, by(sex race age year)

. 
. * mortality rate
. gen rate = count / pop * 100000

. label var rate "death rate per 100,000"

. 
. * have a look at the rates by year
. table age year race, c(mean rate) by(sex) format(%7.1f)

--------------------------------------------------
Gender and  |            Race and Year            
Ten-Year    | --- NH Black ---    --- NH White ---
Age Groups  |    2014     2015       2014     2015
------------+-------------------------------------
Female      |
00-01 years |  1013.7   1025.6      450.8    445.4
01-04 years |    34.0     34.1       20.2     20.1
05-14 years |    15.1     15.9        9.8     11.2
15-24 years |    44.2     48.0       37.4     40.0
25-34 years |    93.7     98.1       73.0     78.6
35-44 years |   203.5    202.0      143.2    145.1
45-54 years |   472.2    464.5      325.0    327.0
55-64 years |   999.8    997.5      653.5    661.2
65-74 years |  1924.1   1912.9     1465.6   1475.2
75-84 years |  4447.4   4420.7     4091.0   4115.4
  85+ years | 11854.7  11965.4    13322.4  13717.6
------------+-------------------------------------
Male        |
00-01 years |  1184.7   1214.7      549.6    540.8
01-04 years |    45.3     48.9       24.9     25.5
05-14 years |    22.2     22.4       14.2     14.5
15-24 years |   142.3    158.9       90.6     93.4
25-34 years |   224.6    242.1      155.5    167.7
35-44 years |   323.4    343.2      226.5    237.5
45-54 years |   691.9    700.9      510.2    509.1
55-64 years |  1649.2   1655.7     1084.4   1092.6
65-74 years |  3109.9   3107.9     2172.0   2185.0
75-84 years |  6295.6   6351.2     5509.7   5512.6
  85+ years | 13548.1  13359.3    15322.5  15561.9
--------------------------------------------------

. 
. * group by sex and year for faster life table construction
. egen class=group(sex race year)

. 
. * define number of years in age interval (10-year age groups)
. gen n=1 if age==1
(80 missing values generated)

. replace n=4 if age==2
(8 real changes made)

. replace n=10 if age>2
(72 real changes made)

. replace n=1 if age==11
(8 real changes made)

. label var n "no. years in age interval"

. 
. * average person-years contributed by those dying within interval
. * assumed to be 1/2 apart from infant mortality
. gen ax=0.1 if age==1 // infants
(80 missing values generated)

. replace ax=0.5 if age>1 & age<=11 // all other age groups
(80 real changes made)

. 
. * life table variables
. foreach var in m q p l d L T e var_q v sv var_e se_e {
  2.         qui gen `var'x=.
  3.         }

. 
. * labels
. label var ax "avg time contributed by deaths"

. label var mx "death rate at age x"

. label var qx "probability of death at age x"

. label var px "probability of survival at age x"

. label var lx "number alive at age x"

. label var dx "expected deaths at age x"

. label var Lx "person-years lived in interval"

. label var Tx "time lived beyond age x"

. label var ex "life expectancy at age x"

. label var var_qx "variance of prob. of death"

. label var vx "Chiang formula for variance"

. label var svx "sum of Chiang formula"

. label var var_ex "variance of life expectancy"

. label var se_ex "standard error of life expectancy"

. 
. 
. 
. // #4
. // calculate life table values by group
. 
. sort class age

. 
. qui levelsof class, local(levels)

. foreach l of local levels {
  2. 
.         * mortality rate
.         qui replace mx=count/pop if class==`l'   
  3.         
.         * probability of death  
.         qui replace qx=n*mx/(1+n*(1-ax)*mx) if class==`l'
  4.         qui replace qx = 1 if age==11 & class==`l'
  5.         
.         * conditional prob of survival
.         qui replace px=1-qx if class==`l'
  6.         
.         * no alive at beginning of interval
.         qui replace lx = 100000 if age==1 & class==`l'
  7.         qui replace lx = lx[_n-1] * px[_n-1] if age>1 & class==`l'
  8.         
.         * Generate deaths by differencing the number of survivors and 
.         * noting that everyone dies in the end
.         qui replace dx = lx - lx[_n+1] if class==`l'
  9.         qui replace dx = lx if age==11 & class==`l'
 10.         
.         * Compute person-years lived in each age group
.         * n for those who survive the age group and nax for those who die
.         qui replace Lx = n * (lx[_n+1] + (ax*dx)) if class==`l'
 11.         qui replace Lx = lx/mx if age==11 & class==`l'
 12.         
. 
.         /* Accumulating from the bottom up is a bit tricky because Stata likes
>  
>         to sum from the top down. You could sort the data from oldest to 
>         youngest, sum, and then sort again. I will subtract the cumulative 
>         sum from the total.*/
.         qui sum Lx if class==`l'
 13.         qui replace Tx = r(sum) - sum(Lx) + Lx if class==`l'
 14.         
.         
.         * Compute life expectancy 
.         *(time lived after each age / survivors to that age)
.         qui replace ex = Tx/lx if class==`l'
 15.         
.         * variance of cond. probability of death
.         qui replace var_qx = [n^2 * mx*(1-ax*n*mx)] / [pop*(1+(1-ax)*n*mx)^3] 
> if class==`l'
 16.         qui replace var_qx = 0 in -1 if class==`l'
 17. 
. 
.         * calculate second part of Chiang formula for variance of LE [add cite
> ] 
.         qui replace vx = (lx^2)*[((1-ax)*n+ex[_n+1])^2]*var_qx if class==`l'
 18.         qui replace vx = 0 in -1 if class==`l'
 19.         
.         * sum of vx
.         qui sum vx if class==`l'
 20.         qui replace svx = r(sum) - sum(vx) + vx if class==`l'
 21.         
.         * variance and se of life expectancy
.         qui replace var_ex = svx / lx^2 if class==`l'
 22.         qui replace se_ex = sqrt(var_ex) if class==`l'
 23.         }

. 
. 
. * specify a few formats
. format %6.3f ax ex var_ex se_ex

. format %8.6f mx qx px

. format %9.0fc pop count lx dx Lx Tx

. 
. * table of life expectancies by year
. table race year sex if age==1, c(mean ex) format(%4.2f)

----------------------------------------
          |       Gender and Year       
          | -- Female --    --- Male ---
     Race |  2014   2015     2014   2015
----------+-----------------------------
 NH Black | 78.79  78.76    72.67  72.38
 NH White | 81.32  81.10    76.63  76.45
----------------------------------------

. 
. 
. // #5
. // Decompose by age
. 
. // drop unnecessary variables and reshape the data to wide format with
. // rows for each sex year age and colums for each race group
. 
. keep lx Tx Lx mx sex race year age

. reshape wide lx Tx Lx mx, i(sex race age) j(year)
(note: j = 2014 2015)

Data                               long   ->   wide
-----------------------------------------------------------------------------
Number of obs.                       88   ->      44
Number of variables                   8   ->      11
j variable (2 values)              year   ->   (dropped)
xij variables:
                                     lx   ->   lx2014 lx2015
                                     Tx   ->   Tx2014 Tx2015
                                     Lx   ->   Lx2014 Lx2015
                                     mx   ->   mx2014 mx2015
-----------------------------------------------------------------------------

. 
. /* decompose LE by age, using formulas from Arriaga (1984) 
>         Measuring and explaining the change in life expectancies. 
>         Demography 1984;21: 83-96. */
. 
. * generate direct effect
. gen de=(lx2015/100000) * ((Lx2014/lx2014) - (Lx2015/lx2015))

. label var de "direct effect"

. 
. * generate indirect effect and interaction term
. gen ie=(Tx2014[_n+1]/100000) * ///
>   ((lx2015/lx2014) - (lx2015[_n+1]/lx2014[_n+1])) if age!=11
(4 missing values generated)

. replace ie=0 if age==11
(4 real changes made)

. label var ie "indirect effect+interact"

. 
. * total effect (direct + indirect + interaction)
. * contribution in years of LE
. gen te=de+ie

. label var te "diff in life exp"

. 
. drop lx* Lx* Tx*

. 
. * reshape dataset to wide format to calculate total
. reshape wide de ie te mx2014 mx2015, i(sex race) j(age)
(note: j = 1 2 3 4 5 6 7 8 9 10 11)

Data                               long   ->   wide
-----------------------------------------------------------------------------
Number of obs.                       44   ->       4
Number of variables                   8   ->      57
j variable (11 values)              age   ->   (dropped)
xij variables:
                                     de   ->   de1 de2 ... de11
                                     ie   ->   ie1 ie2 ... ie11
                                     te   ->   te1 te2 ... te11
                                 mx2014   ->   mx20141 mx20142 ... mx201411
                                 mx2015   ->   mx20151 mx20152 ... mx201511
-----------------------------------------------------------------------------

. 
. foreach var of newlist de ie te {
  2.         egen `var'12 = rsum(`var'*) // sum across age groups
  3.         }

. 
. * reshape dataset back to long
. reshape long de ie te mx2014 mx2015, i(sex race) j(age)
(note: j = 1 2 3 4 5 6 7 8 9 10 11 12)
(note: mx201412 not found)
(note: mx201512 not found)

Data                               wide   ->   long
-----------------------------------------------------------------------------
Number of obs.                        4   ->      48
Number of variables                  60   ->       8
j variable (12 values)                    ->   age
xij variables:
                       de1 de2 ... de12   ->   de
                       ie1 ie2 ... ie12   ->   ie
                       te1 te2 ... te12   ->   te
           mx20141 mx20142 ... mx201412   ->   mx2014
           mx20151 mx20152 ... mx201512   ->   mx2015
-----------------------------------------------------------------------------

. 
. * total across all age groups
. label define age 12 "Total", add

. label values age age

. 
. * proportional contribution
. sort sex race age

. bysort sex race: gen pctgap=te[_n] / te[12]

. 
. table age race, c(sum te) by(sex)

----------------------------------
Gender and  |
Ten-Year    |         Race        
Age Groups  |  NH Black   NH White
------------+---------------------
Female      |
00-01 years |  .0093709  -.0043533
01-04 years |  .0003715  -.0002678
05-14 years |  .0056844   .0097367
15-24 years |  .0228072   .0160587
25-34 years |  .0216766   .0285853
35-44 years | -.0056774   .0080477
45-54 years | -.0229025   .0065916
55-64 years | -.0047271   .0174106
65-74 years | -.0141628   .0133172
75-84 years | -.0163499   .0158984
  85+ years |  .0338389   .1077214
      Total |  .0299298   .2187463
------------+---------------------
Male        |
00-01 years |   .021518  -.0067571
01-04 years |  .0102328   .0017596
05-14 years |  .0012533   .0018003
15-24 years |    .08782   .0158011
25-34 years |  .0750688   .0572501
35-44 years |  .0665295   .0409182
45-54 years |  .0221081  -.0030442
55-64 years |  .0103733   .0153201
65-74 years | -.0018429   .0140596
75-84 years |  .0209676   .0013538
  85+ years | -.0284955   .0368183
      Total |   .285533   .1752798
----------------------------------

. table age race, c(sum pctgap) by(sex)

----------------------------------
Gender and  |
Ten-Year    |         Race        
Age Groups  |  NH Black   NH White
------------+---------------------
Female      |
00-01 years |  .3130955  -.0199012
01-04 years |  .0124109  -.0012241
05-14 years |  .1899241   .0445112
15-24 years |  .7620223   .0734125
25-34 years |  .7242482   .1306777
35-44 years |   -.18969     .03679
45-54 years | -.7652052   .0301334
55-64 years | -.1579405   .0795925
65-74 years | -.4731991   .0608797
75-84 years | -.5462738   .0726794
  85+ years |  1.130608    .492449
      Total |         1          1
------------+---------------------
Male        |
00-01 years |  .0753608  -.0385506
01-04 years |  .0358375   .0100388
05-14 years |  .0043894   .0102708
15-24 years |  .3075652   .0901479
25-34 years |  .2629075   .3266211
35-44 years |  .2330011    .233445
45-54 years |  .0774275  -.0173674
55-64 years |  .0363295   .0874039
65-74 years | -.0064541   .0802125
75-84 years |  .0734333   .0077235
  85+ years | -.0997977   .2100547
      Total |         1          1
----------------------------------

. 
. 
. egen class=group(sex race)

. label define class 1 "Non-Hispanic Black Women" ///
>   2 "Non-Hispanic White Women" 3 "Non-Hispanic Black Men" ///
>   4 "Non-Hispanic White Men", modify

. label values class class

. 
. * save this as a dataset for plotting in R
. saveold age-race, replace version(12)
(saving in Stata 12 format, which can be read by Stata 11 or 12)
file age-race.dta saved

. 
. 
. // #6
. // estimate cause-specific proportion of deaths
. 
. * load the mortality data
. use usa-decomp-2015-race, clear

. 
. * calculate proportion of deaths for each cause by sex, year age
. rename cod14 cod

. drop pop rate

. reshape wide count, i(sex race age cod) j(year)
(note: j = 2014 2015)

Data                               long   ->   wide
-----------------------------------------------------------------------------
Number of obs.                     1232   ->     616
Number of variables                   6   ->       6
j variable (2 values)              year   ->   (dropped)
xij variables:
                                  count   ->   count2014 count2015
-----------------------------------------------------------------------------

. 
. * now reshape wide again to get deaths by cause as variables
. reshape wide count2014 count2015, i(sex race age) j(cod)
(note: j = 1 2 3 4 5 6 7 8 9 10 11 12 13 14)

Data                               long   ->   wide
-----------------------------------------------------------------------------
Number of obs.                      616   ->      44
Number of variables                   6   ->      31
j variable (14 values)              cod   ->   (dropped)
xij variables:
                              count2014   ->   count20141 count20142 ... count20
> 1414
                              count2015   ->   count20151 count20152 ... count20
> 1514
-----------------------------------------------------------------------------

. 
. * total deaths for each group
. foreach v of numlist 2014 2015 {
  2.   egen tdeaths`v' = rsum(count`v'*)
  3. }

. 
. * proportion of deaths for each cause, by age, year, race
. forvalues i=1/14 {
  2.   gen pdeaths2014`i' = count2014`i' / tdeaths2014
  3.   gen pdeaths2015`i' = count2015`i' / tdeaths2015
  4. }

. 
. * save dataset for merging with age-decompositions
. save cod-race, replace
file cod-race.dta saved

. 
. 
. // #7
. // now decomposition by age and cause of death
. 
. * load the age decomposition
. use age-race, clear

. drop if age==12 // drop total for all ages
(4 observations deleted)

. 
. * merge with proportion of deaths by cause
. merge 1:1 sex race age using cod-race
(note: variable age was byte, now long to accommodate using data's values)
(label sex already defined)
(label race already defined)
(label age already defined)

    Result                           # of obs.
    -----------------------------------------
    not matched                             0
    matched                                44  (_merge==3)
    -----------------------------------------

. drop _merge

. 
. /* formula for partitioning of each age group component by cause of death
>   from Arriaga EE. Changing trends in mortality decline during the last
>   decades. In Ruzicka et al. Differential mortality: Methodological issues 
>   and biosocial factors. 1989;p. 105–29.*/
. 
. local i = 1

. while `i' < 15 {
  2.         gen cause`i' = te* (((mx2014*pdeaths2014`i') - (mx2015*pdeaths2015`
> i')) ///
>         / (mx2014-mx2015))
  3.         local ++i
  4. }

. 
. * drop proportions of deaths by cause
. drop count* pdeaths*

. 
. * reshape long by cause
. reshape long cause, i(sex race age) j(cod)
(note: j = 1 2 3 4 5 6 7 8 9 10 11 12 13 14)

Data                               wide   ->   long
-----------------------------------------------------------------------------
Number of obs.                       44   ->     616
Number of variables                  26   ->      14
j variable (14 values)                    ->   cod
xij variables:
              cause1 cause2 ... cause14   ->   cause
-----------------------------------------------------------------------------

. 
. rename cause cont

. label var cont "contribution to LE gap"

. 
. 
. * proportional contribution to change in the gap
. egen gap=total(cont), by(sex race)

. gen pctgapc=cont/gap

. 
. table cod race, contents(sum pctgapc) row by(sex)

--------------------------------------
Gender and      |
RECODE of       |         Race        
cod113          |  NH Black   NH White
----------------+---------------------
Female          |
           CVDs |   .508943   .3563683
        Cancers | -2.615492  -.1890892
       Diabetes | -.6678039    .033651
    Alzheimer's |  3.308956   .3487302
  Flu/pneumonia | -.4421002   .0041633
            HIV | -.3034475  -.0041996
Chronic Resp dx |  .3545524   .1470934
       Liver dx |  .3165944   .0591406
      Kidney dx |  .3214861   .0113396
     MV crashes |  .5639225   .0519621
      Poisoning |  .4814439    .172024
        Suicide |   .087865   .0537825
       Homicide |  .4016594   .0208387
       Residual | -1.316579  -.0658047
                | 
          Total |  .9999999          1
----------------+---------------------
Male            |
           CVDs |  .0349679   .1158496
        Cancers | -.3095186  -.2936087
       Diabetes |  .0665861   .0365296
    Alzheimer's |  .0843396   .1954617
  Flu/pneumonia | -.0036826  -.0503731
            HIV |  -.056396  -.0195321
Chronic Resp dx |  .0410062   .0559679
       Liver dx |  .0018613   .0605647
      Kidney dx |  .0672608   .0046528
     MV crashes |  .1620514   .1068359
      Poisoning |  .2252064   .4954221
        Suicide |  .0250947   .1221989
       Homicide |  .5653554   .0440463
       Residual |  .0958674   .1259843
                | 
          Total |         1          1
--------------------------------------

. 
. collapse (sum) cont, by(sex race cod)

. 
. reshape wide cont, i(sex race) j(cod)
(note: j = 1 2 3 4 5 6 7 8 9 10 11 12 13 14)

Data                               long   ->   wide
-----------------------------------------------------------------------------
Number of obs.                       56   ->       4
Number of variables                   4   ->      16
j variable (14 values)              cod   ->   (dropped)
xij variables:
                                   cont   ->   cont1 cont2 ... cont14
-----------------------------------------------------------------------------

. egen cont15 = rsum(cont*)

. reshape long cont, i(sex race) j(cod)
(note: j = 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)

Data                               wide   ->   long
-----------------------------------------------------------------------------
Number of obs.                        4   ->      60
Number of variables                  17   ->       4
j variable (15 values)                    ->   cod
xij variables:
                 cont1 cont2 ... cont15   ->   cont
-----------------------------------------------------------------------------

. 
. label define cod 15 "Total", add

. 
. * proportional contribution
. sort sex race cod

. bysort sex race: gen pctgapc=cont[_n] / cont[15]

. 
. egen class=group(sex race)

. label define class 1 "Non-Hispanic Black Women" ///
>   2 "Non-Hispanic White Women" 3 "Non-Hispanic Black Men" ///
>   4 "Non-Hispanic White Men", modify

. label values class class

. 
. * save dataset for plotting in R
. saveold cod-race-plots, replace version(12)
(saving in Stata 12 format, which can be read by Stata 11 or 12)
file cod-race-plots.dta saved

. 
. 
. log close
      name:  <unnamed>
       log:  /Users/samharper/OneDrive - McGill University/usa-decomp-2015-race.
> log
  log type:  text
 closed on:  16 Feb 2017, 11:27:08
--------------------------------------------------------------------------------
